-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[clang][CodeGen] Fix sub-optimal clang CodeGen for __atomic_test_and_set #160098
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-clang-codegen @llvm/pr-subscribers-clang Author: Sirui Mu (Lancern) ChangesClang CodeGen for bool f(void *ptr) {
return __atomic_test_and_set(ptr, __ATOMIC_RELAXED);
} %1 = atomicrmw xchg ptr %0, i8 1 monotonic, align 1
%tobool = icmp ne i8 %1, 0
store i1 %tobool, ptr %atomic-temp, align 1 which could lead to suboptimal binary code, for example on x86_64: f:
mov al, 1
xchg byte ptr [rdi], al
test al, al
setne al
setne byte ptr [rsp - 1]
ret The last I'm quite conservative about the codegen in this patch. Vanilla gcc actually emits simpler code for bool f(void *ptr) {
return __atomic_test_and_set(ptr, __ATOMIC_RELAXED);
} f:
mov eax, 1
xchg al, BYTE PTR [rdi]
ret It seems like gcc assumes Related to #121943 . Full diff: https://github.com/llvm/llvm-project/pull/160098.diff 2 Files Affected:
diff --git a/clang/lib/CodeGen/CGAtomic.cpp b/clang/lib/CodeGen/CGAtomic.cpp
index 9106c4cd8e139..d44f8e1ea40eb 100644
--- a/clang/lib/CodeGen/CGAtomic.cpp
+++ b/clang/lib/CodeGen/CGAtomic.cpp
@@ -735,8 +735,10 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest,
CGF.Builder.getInt8(1), Order, Scope, E);
RMWI->setVolatile(E->isVolatile());
llvm::Value *Result = CGF.Builder.CreateIsNotNull(RMWI, "tobool");
- auto *I = CGF.Builder.CreateStore(Result, Dest);
- CGF.addInstToCurrentSourceAtom(I, Result);
+ llvm::Value *ExtResult =
+ CGF.Builder.CreateZExt(Result, CGF.Builder.getInt8Ty(), "tobool.zext");
+ auto *I = CGF.Builder.CreateStore(ExtResult, Dest);
+ CGF.addInstToCurrentSourceAtom(I, ExtResult);
return;
}
diff --git a/clang/test/CodeGen/atomic-test-and-set.c b/clang/test/CodeGen/atomic-test-and-set.c
index 39d4cef16b21d..6438094567f33 100644
--- a/clang/test/CodeGen/atomic-test-and-set.c
+++ b/clang/test/CodeGen/atomic-test-and-set.c
@@ -81,7 +81,8 @@ void clear_dynamic(char *ptr, int order) {
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8
// CHECK-NEXT: [[TMP1:%.*]] = atomicrmw xchg ptr [[TMP0]], i8 1 monotonic, align 1
// CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i8 [[TMP1]], 0
-// CHECK-NEXT: store i1 [[TOBOOL]], ptr [[ATOMIC_TEMP]], align 1
+// CHECK-NEXT: [[TOBOOL_ZEXT:%.*]] = zext i1 [[TOBOOL]] to i8
+// CHECK-NEXT: store i8 [[TOBOOL_ZEXT]], ptr [[ATOMIC_TEMP]], align 1
// CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1
// CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP2]] to i1
// CHECK-NEXT: ret void
@@ -99,7 +100,8 @@ void test_and_set_relaxed(char *ptr) {
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8
// CHECK-NEXT: [[TMP1:%.*]] = atomicrmw xchg ptr [[TMP0]], i8 1 acquire, align 1
// CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i8 [[TMP1]], 0
-// CHECK-NEXT: store i1 [[TOBOOL]], ptr [[ATOMIC_TEMP]], align 1
+// CHECK-NEXT: [[TOBOOL_ZEXT:%.*]] = zext i1 [[TOBOOL]] to i8
+// CHECK-NEXT: store i8 [[TOBOOL_ZEXT]], ptr [[ATOMIC_TEMP]], align 1
// CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1
// CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP2]] to i1
// CHECK-NEXT: ret void
@@ -117,7 +119,8 @@ void test_and_set_consume(char *ptr) {
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8
// CHECK-NEXT: [[TMP1:%.*]] = atomicrmw xchg ptr [[TMP0]], i8 1 acquire, align 1
// CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i8 [[TMP1]], 0
-// CHECK-NEXT: store i1 [[TOBOOL]], ptr [[ATOMIC_TEMP]], align 1
+// CHECK-NEXT: [[TOBOOL_ZEXT:%.*]] = zext i1 [[TOBOOL]] to i8
+// CHECK-NEXT: store i8 [[TOBOOL_ZEXT]], ptr [[ATOMIC_TEMP]], align 1
// CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1
// CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP2]] to i1
// CHECK-NEXT: ret void
@@ -135,7 +138,8 @@ void test_and_set_acquire(char *ptr) {
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8
// CHECK-NEXT: [[TMP1:%.*]] = atomicrmw xchg ptr [[TMP0]], i8 1 release, align 1
// CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i8 [[TMP1]], 0
-// CHECK-NEXT: store i1 [[TOBOOL]], ptr [[ATOMIC_TEMP]], align 1
+// CHECK-NEXT: [[TOBOOL_ZEXT:%.*]] = zext i1 [[TOBOOL]] to i8
+// CHECK-NEXT: store i8 [[TOBOOL_ZEXT]], ptr [[ATOMIC_TEMP]], align 1
// CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1
// CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP2]] to i1
// CHECK-NEXT: ret void
@@ -153,7 +157,8 @@ void test_and_set_release(char *ptr) {
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8
// CHECK-NEXT: [[TMP1:%.*]] = atomicrmw xchg ptr [[TMP0]], i8 1 acq_rel, align 1
// CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i8 [[TMP1]], 0
-// CHECK-NEXT: store i1 [[TOBOOL]], ptr [[ATOMIC_TEMP]], align 1
+// CHECK-NEXT: [[TOBOOL_ZEXT:%.*]] = zext i1 [[TOBOOL]] to i8
+// CHECK-NEXT: store i8 [[TOBOOL_ZEXT]], ptr [[ATOMIC_TEMP]], align 1
// CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1
// CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP2]] to i1
// CHECK-NEXT: ret void
@@ -171,7 +176,8 @@ void test_and_set_acq_rel(char *ptr) {
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8
// CHECK-NEXT: [[TMP1:%.*]] = atomicrmw xchg ptr [[TMP0]], i8 1 seq_cst, align 1
// CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i8 [[TMP1]], 0
-// CHECK-NEXT: store i1 [[TOBOOL]], ptr [[ATOMIC_TEMP]], align 1
+// CHECK-NEXT: [[TOBOOL_ZEXT:%.*]] = zext i1 [[TOBOOL]] to i8
+// CHECK-NEXT: store i8 [[TOBOOL_ZEXT]], ptr [[ATOMIC_TEMP]], align 1
// CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1
// CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP2]] to i1
// CHECK-NEXT: ret void
@@ -200,27 +206,32 @@ void test_and_set_seq_cst(char *ptr) {
// CHECK: [[MONOTONIC]]:
// CHECK-NEXT: [[TMP2:%.*]] = atomicrmw xchg ptr [[TMP0]], i8 1 monotonic, align 1
// CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i8 [[TMP2]], 0
-// CHECK-NEXT: store i1 [[TOBOOL]], ptr [[ATOMIC_TEMP]], align 1
+// CHECK-NEXT: [[TOBOOL_ZEXT:%.*]] = zext i1 [[TOBOOL]] to i8
+// CHECK-NEXT: store i8 [[TOBOOL_ZEXT]], ptr [[ATOMIC_TEMP]], align 1
// CHECK-NEXT: br label %[[ATOMIC_CONTINUE:.*]]
// CHECK: [[ACQUIRE]]:
// CHECK-NEXT: [[TMP3:%.*]] = atomicrmw xchg ptr [[TMP0]], i8 1 acquire, align 1
// CHECK-NEXT: [[TOBOOL1:%.*]] = icmp ne i8 [[TMP3]], 0
-// CHECK-NEXT: store i1 [[TOBOOL1]], ptr [[ATOMIC_TEMP]], align 1
+// CHECK-NEXT: [[TOBOOL_ZEXT1:%.*]] = zext i1 [[TOBOOL1]] to i8
+// CHECK-NEXT: store i8 [[TOBOOL_ZEXT1]], ptr [[ATOMIC_TEMP]], align 1
// CHECK-NEXT: br label %[[ATOMIC_CONTINUE]]
// CHECK: [[RELEASE]]:
// CHECK-NEXT: [[TMP4:%.*]] = atomicrmw xchg ptr [[TMP0]], i8 1 release, align 1
// CHECK-NEXT: [[TOBOOL2:%.*]] = icmp ne i8 [[TMP4]], 0
-// CHECK-NEXT: store i1 [[TOBOOL2]], ptr [[ATOMIC_TEMP]], align 1
+// CHECK-NEXT: [[TOBOOL_ZEXT2:%.*]] = zext i1 [[TOBOOL2]] to i8
+// CHECK-NEXT: store i8 [[TOBOOL_ZEXT2]], ptr [[ATOMIC_TEMP]], align 1
// CHECK-NEXT: br label %[[ATOMIC_CONTINUE]]
// CHECK: [[ACQREL]]:
// CHECK-NEXT: [[TMP5:%.*]] = atomicrmw xchg ptr [[TMP0]], i8 1 acq_rel, align 1
// CHECK-NEXT: [[TOBOOL3:%.*]] = icmp ne i8 [[TMP5]], 0
-// CHECK-NEXT: store i1 [[TOBOOL3]], ptr [[ATOMIC_TEMP]], align 1
+// CHECK-NEXT: [[TOBOOL_ZEXT3:%.*]] = zext i1 [[TOBOOL3]] to i8
+// CHECK-NEXT: store i8 [[TOBOOL_ZEXT3]], ptr [[ATOMIC_TEMP]], align 1
// CHECK-NEXT: br label %[[ATOMIC_CONTINUE]]
// CHECK: [[SEQCST]]:
// CHECK-NEXT: [[TMP6:%.*]] = atomicrmw xchg ptr [[TMP0]], i8 1 seq_cst, align 1
// CHECK-NEXT: [[TOBOOL4:%.*]] = icmp ne i8 [[TMP6]], 0
-// CHECK-NEXT: store i1 [[TOBOOL4]], ptr [[ATOMIC_TEMP]], align 1
+// CHECK-NEXT: [[TOBOOL_ZEXT4:%.*]] = zext i1 [[TOBOOL4]] to i8
+// CHECK-NEXT: store i8 [[TOBOOL_ZEXT4]], ptr [[ATOMIC_TEMP]], align 1
// CHECK-NEXT: br label %[[ATOMIC_CONTINUE]]
// CHECK: [[ATOMIC_CONTINUE]]:
// CHECK-NEXT: [[TMP7:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1
@@ -239,7 +250,8 @@ void test_and_set_dynamic(char *ptr, int order) {
// CHECK-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [10 x i32], ptr [[X]], i64 0, i64 0
// CHECK-NEXT: [[TMP0:%.*]] = atomicrmw volatile xchg ptr [[ARRAYDECAY]], i8 1 seq_cst, align 4
// CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i8 [[TMP0]], 0
-// CHECK-NEXT: store i1 [[TOBOOL]], ptr [[ATOMIC_TEMP]], align 1
+// CHECK-NEXT: [[TOBOOL_ZEXT:%.*]] = zext i1 [[TOBOOL]] to i8
+// CHECK-NEXT: store i8 [[TOBOOL_ZEXT]], ptr [[ATOMIC_TEMP]], align 1
// CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1
// CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP1]] to i1
// CHECK-NEXT: ret void
@@ -301,7 +313,8 @@ void clear_incomplete(struct incomplete *ptr) {
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8
// CHECK-NEXT: [[TMP1:%.*]] = atomicrmw xchg ptr [[TMP0]], i8 1 monotonic, align 4
// CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i8 [[TMP1]], 0
-// CHECK-NEXT: store i1 [[TOBOOL]], ptr [[ATOMIC_TEMP]], align 1
+// CHECK-NEXT: [[TOBOOL_ZEXT:%.*]] = zext i1 [[TOBOOL]] to i8
+// CHECK-NEXT: store i8 [[TOBOOL_ZEXT]], ptr [[ATOMIC_TEMP]], align 1
// CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1
// CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP2]] to i1
// CHECK-NEXT: ret void
@@ -318,7 +331,8 @@ void test_and_set_int(int *ptr) {
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8
// CHECK-NEXT: [[TMP1:%.*]] = atomicrmw xchg ptr [[TMP0]], i8 1 monotonic, align 1
// CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i8 [[TMP1]], 0
-// CHECK-NEXT: store i1 [[TOBOOL]], ptr [[ATOMIC_TEMP]], align 1
+// CHECK-NEXT: [[TOBOOL_ZEXT:%.*]] = zext i1 [[TOBOOL]] to i8
+// CHECK-NEXT: store i8 [[TOBOOL_ZEXT]], ptr [[ATOMIC_TEMP]], align 1
// CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1
// CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP2]] to i1
// CHECK-NEXT: ret void
@@ -335,7 +349,8 @@ void test_and_set_void(void *ptr) {
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8
// CHECK-NEXT: [[TMP1:%.*]] = atomicrmw xchg ptr [[TMP0]], i8 1 monotonic, align 1
// CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i8 [[TMP1]], 0
-// CHECK-NEXT: store i1 [[TOBOOL]], ptr [[ATOMIC_TEMP]], align 1
+// CHECK-NEXT: [[TOBOOL_ZEXT:%.*]] = zext i1 [[TOBOOL]] to i8
+// CHECK-NEXT: store i8 [[TOBOOL_ZEXT]], ptr [[ATOMIC_TEMP]], align 1
// CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1
// CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP2]] to i1
// CHECK-NEXT: ret void
|
504bfa6
to
bfa16a7
Compare
clang/lib/CodeGen/CGAtomic.cpp
Outdated
@@ -735,8 +735,10 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest, | |||
CGF.Builder.getInt8(1), Order, Scope, E); | |||
RMWI->setVolatile(E->isVolatile()); | |||
llvm::Value *Result = CGF.Builder.CreateIsNotNull(RMWI, "tobool"); | |||
auto *I = CGF.Builder.CreateStore(Result, Dest); | |||
CGF.addInstToCurrentSourceAtom(I, Result); | |||
llvm::Value *ExtResult = |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
For consistency with other code, this should use CodeGenFunction::EmitToMemory
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Updated.
Clang CodeGen for `__atomic_test_and_set` would emit a `store` instruction that stores an `i1` value: ```cpp bool f(bool *ptr) { return __atomic_test_and_set(ptr, __ATOMIC_RELAXED); } ``` ```llvm %1 = atomicrmw xchg ptr %0, i8 1 monotonic, align 1 %tobool = icmp ne i8 %1, 0 store i1 %tobool, ptr %atomic-temp, align 1 ``` which could lead to suboptimal binary code, for example on x86_64: ```asm f: mov al, 1 xchg byte ptr [rdi], al test al, al setne al setne byte ptr [rsp - 1] ret ``` This patch fixes this issue by first extending the `i1` value to an `i8` before the store. This effectively eliminates the last `setne` instruction in the binary code sequence.
bfa16a7
to
5ebd984
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
…set (llvm#160098) Clang CodeGen for `__atomic_test_and_set` would emit a `store` instruction that stores an `i1` value: ```cpp bool f(void *ptr) { return __atomic_test_and_set(ptr, __ATOMIC_RELAXED); } ``` ```llvm %1 = atomicrmw xchg ptr %0, i8 1 monotonic, align 1 %tobool = icmp ne i8 %1, 0 store i1 %tobool, ptr %atomic-temp, align 1 ``` which could lead to suboptimal binary code, for example on x86_64: ```asm f: mov al, 1 xchg byte ptr [rdi], al test al, al setne al setne byte ptr [rsp - 1] ret ``` The last `setne` instruction is obviously redundant. This patch fixes this issue by first zero-extending `%tobool` to an `i8` before the store. This effectively eliminates the last `setne` instruction in the binary code sequence. The `test` and `setne` on `al` is kept still, though. ----- I'm quite conservative about the codegen in this patch. Vanilla gcc actually emits simpler code for `__atomic_test_and_set`: ```cpp bool f(void *ptr) { return __atomic_test_and_set(ptr, __ATOMIC_RELAXED); } ``` ```asm f: mov eax, 1 xchg al, BYTE PTR [rdi] ret ``` It seems like gcc assumes `ptr` would always point to a valid `bool` value as required by the ABI. I'm not sure if we should also make this assumption. Related to llvm#121943 .
Clang CodeGen for
__atomic_test_and_set
would emit astore
instruction that stores ani1
value:which could lead to suboptimal binary code, for example on x86_64:
The last
setne
instruction is obviously redundant. This patch fixes this issue by first zero-extending%tobool
to ani8
before the store. This effectively eliminates the lastsetne
instruction in the binary code sequence. Thetest
andsetne
onal
is kept still, though.I'm quite conservative about the codegen in this patch. Vanilla gcc actually emits simpler code for
__atomic_test_and_set
:It seems like gcc assumes
ptr
would always point to a validbool
value as required by the ABI. I'm not sure if we should also make this assumption.Related to #121943 .